###########################################
#  Primary Divisions: How Voters Evaluate Policy and Group Differences in Intra-Party Contests
#   - Forthcoming at The Journal of Politics
#   - Henderson et al 2021
#
###########################################
#  - code by S. Goggin & J. Henderson
########################################################
# This file produces main primary choice results for Figure 2, plus some additional ancillary results for outparty comparisons
########################################################
# inputs :: /data/cces_stacked_unmatched.Rdata

# outputs ::
# -- Figure 2: 'Impact of Candidate Attributes on Support in Primary Election Settings'
# => main/figures/choice_inparty_twoparty_separated.pdf

# -- Supplementary Figures :: outparty results
# => main/figures/choice_outparty_twoparty.pdf

#write.csv(
#	list('n.rep'=nrow(reps),
# 		'n.rep.cluster'=length(unique(reps$respondent)),
# 		'n.dem'=nrow(dems),
# 		'n.dem.cluster'=length(unique(dems$respondent))
#	),
#	file='main/figures/choice_inparty_twoparty.csv'
#)

#write.csv(
#	list('n.rep'=nrow(reps),
# 		'n.rep.cluster'=length(unique(reps$respondent)),
# 		'n.dem'=nrow(dems),
# 		'n.dem.cluster'=length(unique(dems$respondent))
#	),
#	file='main/figures/choice_outparty_twoparty.csv'
#)

#dirs="~/Dropbox/replication0/"
#dirs should be set here or in runR.R

rm(list=ls()[which(ls()!='dirs')])

library(ggplot2)
library(stringr)

# messy function to reorder by some estimate value
lableOrder=function(xmat,labels,label.groups,omits,o.column){

	# denote which label is to be omitted on the label
	for(i in 1:length(omits)){
		labels[which(labels==omits[i])]=paste('omit',labels[which(labels==omits[i])],sep='_')
	}

	# break groups into levels
	un_group=unique(label.groups)

	# this is the item to sort on, typically global or independent
	xm=xmat[,o.column]

	# vector which will contain row order
	xo=1:length(xm)

	# rearranging roworder within level
	for(j in 1:length(un_group)){
		ix=which(label.groups==un_group[j])
		if(length(ix)>2){
			ix=ix[!grepl(labels[ix],pattern='omit')]
			xo[ix]=xo[ix][order(xm[ix])]
		}
	}
	return(xmat[xo,])
}

reOrder=function(x,o){
	ix=array(NA,nrow(x))
	for(i in 1:length(o)){
		ix[i]=which(x$iv_order==o[i])
	}
	return(x[ix,])
}

load("data/cces_stacked_unmatched.Rdata")

###########################################
###First, need to stack based on candidates, not just candidate pairs (and also get text out for labels later)

#This has leaners as independents, which is incorrect
#cces_stacked$pid3clean <- ifelse(cces_stacked$pid3=="Democrat",-1,ifelse(cces_stacked$pid3=="Republican",1,0))

library(car)


load('data/data_matrix_scored.Rdata')
candidate_matrix=data_matrix

###########################################
###Then, produce models w/ Clustered SEs

#Function from: http://scholar.byu.edu/jgubler/book/clustered-standard-errors-r
#Need this for clustered standard errors
clse.f <- function(dat,fm, cluster){
 require(sandwich)
 require(lmtest)
 not <- attr(fm$model,"na.action")
if( ! is.null(not)){
  cluster <- cluster[-not]
    dat <- dat[-not,]
}
 with(dat,{
 M <- length(unique(cluster))
 N <- length(cluster)
 K <- fm$rank #doF
 #dfc <- (M/(M-1))*((N-1)/(N-K))
 dfc <- ((N-1)/(N-K))
 # estfun is :: residuals(fm) * X
 # summing estimating function to the cluster || i.e., summing residuals to the cluster level  over units in cluster
 uj <- apply(estfun(fm),2, function(x) tapply(x, cluster, sum));
 vcovCL <- dfc*sandwich(fm, meat=crossprod(uj)/N)
 coeftest(fm, vcovCL)
 }
 )
}

############################################################
############################################################
###Now for Primary Elections (by PID)

vote <- subset(candidate_matrix,candidate_matrix$conjoints==3|candidate_matrix$conjoints==8)
vote$pty <- ifelse(vote$conjoints==3,0,1)

reps <- subset(vote, vote$pid3==1)
dems <- subset(vote, vote$pid3==-1)
inds <- subset(vote, vote$pid3==0)


#dv_choice is the vote choice variable
# pty is included for independents here, otherwise of course NAs out.

attach(reps)
primary_elec_reps <- lm(dv_choice~
pty + g_female+
re_black+re_hispanic+
r_catholic+r_evangelical+r_protestant+
o_ceo+o_citycouncil+o_factoryforeman+o_farmer+o_usarmymajor+o_politicalstaffer+o_smallbizowner+o_stateleg+o_teacher+
p_compassionate+p_empathetic+p_inspiring+p_intelligent+p_knowledgeable+p_moral+p_strongleader+
e_business+e_christian+e_civilrights+e_energy+e_environment+e_guncontrol+e_gunrights+e_laborunions+e_reproductive+e_taxreform+e_teaparty+e_veterans+
rec_refuse+rec_secure+rec_stand+rec_work+
i_raisetaxes+i_cuttaxes+i_lgbt+i_marriage+i_drilling+i_need+i_govabuse+i_righttochoose+i_gunrights+i_unfairtrade+i_unbornlives+i_citizenship+i_reducemilitary+i_policing+i_co2emissions+i_bordersecurity+i_guncontrol+i_strengthenmilitary+i_criminals,
weights=1/wt)

primary_elec_reps_clse <- clse.f(reps,primary_elec_reps,respondent)
detach(reps)

attach(dems)
primary_elec_dems <- lm(dv_choice~
pty + g_female+
re_black+re_hispanic+
r_catholic+r_evangelical+r_protestant+
o_ceo+o_citycouncil+o_factoryforeman+o_farmer+o_usarmymajor+o_politicalstaffer+o_smallbizowner+o_stateleg+o_teacher+
p_compassionate+p_empathetic+p_inspiring+p_intelligent+p_knowledgeable+p_moral+p_strongleader+
e_business+e_christian+e_civilrights+e_energy+e_environment+e_guncontrol+e_gunrights+e_laborunions+e_reproductive+e_taxreform+e_teaparty+e_veterans+
rec_refuse+rec_secure+rec_stand+rec_work+
i_raisetaxes+i_cuttaxes+i_lgbt+i_marriage+i_drilling+i_need+i_govabuse+i_righttochoose+i_gunrights+i_unfairtrade+i_unbornlives+i_citizenship+i_reducemilitary+i_policing+i_co2emissions+i_bordersecurity+i_guncontrol+i_strengthenmilitary+i_criminals,
weights = 1/wt)

primary_elec_dems_clse <- clse.f(dems,primary_elec_dems,respondent)
detach(dems)

attach(inds)
primary_elec_inds <- lm(dv_choice~
pty + g_female+
re_black+re_hispanic+
r_catholic+r_evangelical+r_protestant+
o_ceo+o_citycouncil+o_factoryforeman+o_farmer+o_usarmymajor+o_politicalstaffer+o_smallbizowner+o_stateleg+o_teacher+
p_compassionate+p_empathetic+p_inspiring+p_intelligent+p_knowledgeable+p_moral+p_strongleader+
e_business+e_christian+e_civilrights+e_energy+e_environment+e_guncontrol+e_gunrights+e_laborunions+e_reproductive+e_taxreform+e_teaparty+e_veterans+
rec_refuse+rec_secure+rec_stand+rec_work+
i_raisetaxes+i_cuttaxes+i_lgbt+i_marriage+i_drilling+i_need+i_govabuse+i_righttochoose+i_gunrights+i_unfairtrade+i_unbornlives+i_citizenship+i_reducemilitary+i_policing+i_co2emissions+i_bordersecurity+i_guncontrol+i_strengthenmilitary+i_criminals,
weights = 1/wt)

primary_elec_inds_clse <- clse.f(inds,primary_elec_inds,respondent)
detach(inds)


#Now mashing it all together for ggplot

results_matrix <- cbind(primary_elec_dems$coefficients,primary_elec_inds$coefficients,primary_elec_reps$coefficients)
# remove pty coef
results_matrix=results_matrix[-c(2),]
results_matrix=cbind(results_matrix,NA,NA,NA)

for(j in 1:nrow(results_matrix)){
	ix=which(names(primary_elec_dems_clse[,2])==rownames(results_matrix)[j])
	if(length(ix)>0){
		results_matrix[j,4]=primary_elec_dems_clse[ix,2]
	}

	ix=which(names(primary_elec_inds_clse[,2])==rownames(results_matrix)[j])
	if(length(ix)>0){
		results_matrix[j,5]=primary_elec_inds_clse[ix,2]
	}

	ix=which(names(primary_elec_reps_clse[,2])==rownames(results_matrix)[j])
	if(length(ix)>0){
		results_matrix[j,6]=primary_elec_reps_clse[ix,2]
	}
}


results_matrix <- results_matrix[,c(1,4,2,5,3,6)]

colnames(results_matrix) <- c("d_estimate","d_se","i_estimate","i_se","r_estimate","r_se")
results_matrix <- results_matrix[2:58,]
results_matrix_withnames <- cbind(var=rownames(results_matrix),results_matrix)
rownames(results_matrix_withnames) <- seq(1,57,by=1)

#results_matrix_withnames[is.na(results_matrix_withnames)]<- 0
##Now inserting rows for the omitted levels

#omitted: male, white, no religion, attorney, decent, newspaper endorsements, record = helping constituents, raising taxes for both issues

g_male <- c("g_male",rep(0,6))
re_white <- c("re_white",rep(0,6))
r_none <- c("r_none",rep(0,6))
o_attorney <- c("o_attorney",rep(0,6))
p_decent <- c("p_decent",rep(0,6))
e_newspapers <- c("e_newspapers",rep(0,6))
rec_help <- c("rec_help",rep(0,6))
#i_raisetaxes <- c("i1_raisetaxes",rep(0,6))
i_freetrade <- c("i1_freetrade",rep(0,6))

full_matrix <- rbind(
g_male,
results_matrix_withnames[1,],
re_white,
results_matrix_withnames[2:3,],
r_none,
results_matrix_withnames[4:6,],
o_attorney,
results_matrix_withnames[7:15,],
p_decent,
results_matrix_withnames[16:22,],
e_newspapers,
results_matrix_withnames[23:34,],
rec_help,
results_matrix_withnames[35:38,],
i_freetrade,
results_matrix_withnames[39:57,]
)

rownames(full_matrix) <- full_matrix[,1]
full_matrix <- full_matrix[,2:ncol(full_matrix)]
full_matrix_clean <- apply(full_matrix,2,as.numeric)
rownames(full_matrix_clean) <- rownames(full_matrix)

labels <- c(
"Gender - Male",
"Gender - Female",
"Race - White",
"Race - Black",
"Race - Hispanic",
"Religion - None",
"Religion - Catholic",
"Religion - Evangelical Protestant",
"Religion - Protestant",
"Occupation - Attorney",
"Occupation - CEO",
"Occupation - City Council Member",
"Occupation - Factory Foreman",
"Occupation - Farmer",
"Occupation - Former US Army Major",
"Occupation - Political Staffer",
"Occupation - Small Business Owner",
"Occupation - State Legislator",
"Occupation - Teacher",
"Personality - Decent",
"Personality - Compassionate",
"Personality - Empathetic",
"Personality - Inspiring",
"Personality - Intelligent",
"Personality - Knowledgeable",
"Personality - Moral",
"Personality - Strong Leader",
"Endorsements - Major area newspapers",
"Endorsements - Business groups",
"Endorsements - Christian groups",
"Endorsements - Civil rights groups",
"Endorsements - Energy groups",
"Endorsements - Environmental groups",
"Endorsements - Gun control groups",
"Endorsements - Gun rights groups",
"Endorsements - Labor unions",
"Endorsements - Reproductive rights groups",
"Endorsements - Tax reform groups",
"Endorsements - Tea Party groups",
"Endorsements - Veterans groups",
"Record - Help my constituents get the benefits they deserve",
"Record - Refuse to compromise my principles even when it means taking on my party",
"Record - Secure appointment to a powerful legislative committee",
"Record - Stand with my party to do what's right",
"Record - Work across the aisle to get things done",
"Issue - Promote expanding free trade agreements",
"Issue - Raise taxes on those making more than $250,000 a year",
"Issue - Cut taxes on income and capital gains for all",
"Issue - Defend the rights of LGBT individuals",
"Issue - Defend traditional marriage and religious beliefs",
"Issue - Expand domestic oil and gas production through drilling",
"Issue - Expand government and unemployment assistance for those in need",
"Issue - Prevent and prosecute abuse of government assistance programs",
"Issue - Protect a woman's right to choose",
"Issue - Protect gun owners' rights to defend themselves and others",
"Issue - Protect jobs and industry from unfair foreign trade",
"Issue - Protect the lives of the unborn",
"Issue - Provide a path to citizenship for undocumented immigrants",
"Issue - Reduce the size of military and number of military bases",
"Issue - Reform policing and stop racial profiling",
"Issue - Regulate CO2 emissions to combat global warming",
"Issue - Strengthen border security to stop illegal immigration",
"Issue - Strengthen gun control through commonsense restrictions",
"Issue - Strengthen our military and national defense",
"Issue - Toughen sentences and penalties for criminals")

core_for_ggplot <- data.frame(labels=labels,full_matrix_clean)
core_for_ggplot$iv_order <- factor(core_for_ggplot$labels, as.character(core_for_ggplot$labels))

omits=c("Gender - Male",
"Race - White",
"Religion - None",
"Occupation - Attorney",
"Personality - Decent",
"Endorsements - Major area newspapers",
"Record - Help my constituents get the benefits they deserve",
"Issue - Promote expanding free trade agreements")

label.groups=str_sub(labels,1,str_locate(labels,pattern='-')[,1]-2)
o.column=grep(names(core_for_ggplot),pattern='i_est')

#core_for_ggplot=lableOrder(xmat=core_for_ggplot,labels,label.groups,omits,o.column)
#write.csv(core_for_ggplot,"csv/choice_primary_core_for_ggplot.csv")

###########################################
###Then, plot those models

#Doing the within-factor sorting in shitty hack
#core_for_ggplot <- read.csv("csv/choice_primary_core_for_ggplot.csv")
o.order <- read.csv("data/core_for_ggplot_global.csv")$iv_order

core_for_ggplot=reOrder(x=core_for_ggplot,o=o.order)

# augment w/ scores bivariate coefficient || excluded ...

# ideology
primary_elec_dems_ideo <- lm(data=dems,dv_choice~
pty + scores,weights = 1/wt)
primary_elec_dems_ideo_clse <- clse.f(dems,primary_elec_dems_ideo,dems$respondent)

primary_elec_reps_ideo <- lm(data=reps,dv_choice~
pty + scores,weights = 1/wt)
primary_elec_reps_ideo_clse <- clse.f(reps,primary_elec_reps_ideo,reps$respondent)

primary_elec_inds_ideo <- lm(data=inds,dv_choice~
pty + scores,weights = 1/wt)
primary_elec_inds_ideo_clse <- clse.f(inds,primary_elec_inds_ideo,inds$respondent)

core_for_ggplot[,1]=as.character(core_for_ggplot[,1])
#core_for_ggplot[,2]=as.character(core_for_ggplot[,2])
core_for_ggplot[,8]=as.character(core_for_ggplot[,8])
#core_for_ggplot=rbind(core_for_ggplot,c('i_ideology','Overall Ideology Rating',
	#primary_elec_dems_ideo_clse[2,1],primary_elec_dems_ideo_clse[2,2],
	#primary_elec_inds_ideo_clse[2,1],primary_elec_inds_ideo_clse[2,2],
	#primary_elec_reps_ideo_clse[2,1],primary_elec_reps_ideo_clse[2,2],
	#'Overall Ideology Rating'))

attach(core_for_ggplot)
labels=core_for_ggplot$labels


ggplot_stacked <- data.frame(
labels=c(as.character(labels), as.character(labels), as.character(labels)),
PID=c(rep("Democrat",65),rep("Independent",65),rep("Republican",65)),
estimate=c(d_estimate,i_estimate,r_estimate),
se=c(d_se,i_se,r_se))

#ggplot_stacked$labels <- factor(ggplot_stacked$labels,levels=ggplot_stacked$labels[order(ggplot_stacked$labels)])

ggplot_stacked=ggplot_stacked[!grepl(ggplot_stacked$PID,pattern='Independent'),]
#ggplot_stacked$labels <- factor(ggplot_stacked$labels,levels=ggplot_stacked$labels[order(ggplot_stacked$labels)])

detach(core_for_ggplot)
attach(ggplot_stacked)


ggplot_stacked$estimate=as.numeric(as.character(ggplot_stacked$estimate))
ggplot_stacked$se=as.numeric(as.character(ggplot_stacked$se))

#exclude overall ideology
#ggplot_stacked=ggplot_stacked[-c(grep(as.character(ggplot_stacked[,1]),pattern='Overall')),]
ggplot_stacked$labels=as.factor(as.character(ggplot_stacked$labels))
#write.csv(ggplot_stacked,"csv/choice_primary_ggplot_stacked.csv")
attach(ggplot_stacked)
labels=unique(ggplot_stacked$labels)

# choice_primary.R
primary_ggplot_stacked=ggplot_stacked

pd <- position_dodge(.5)
minci <- (ggplot_stacked$estimate - (1.96*ggplot_stacked$se))
maxci <- (ggplot_stacked$estimate + (1.96*ggplot_stacked$se))

ggplot_stacked$PID=gsub(ggplot_stacked$PID,pattern='Democrat',replace=paste(sep='','Democrat (N=',length(unique(dems$respondent)),')'))
ggplot_stacked$PID=gsub(ggplot_stacked$PID,pattern='Republican',replace=paste(sep='','Republican (N=',length(unique(reps$respondent)),')'))

pdf("main/figures/choice_inparty_twoparty.pdf", width=9, height=14, pointsize=12)
ggplot(ggplot_stacked,
    aes(x=labels, y=estimate, colour=PID)) +
    scale_color_manual(values = c("blue4","firebrick1")) +
    scale_x_discrete(limits=rev(labels)) +
    geom_hline(yintercept=0,size=0.5,color="gray80") +
    geom_errorbar(width=.1, aes(ymin=minci, ymax=maxci),position=pd) +
    geom_point(shape=21, size=2,position=pd) +
    ylim(-0.45,0.45) +
    theme_bw() +
    theme(axis.text.y = element_text(hjust=0,color="gray40")) +
    theme(legend.position=c(-0.3,0.935),legend.title=element_text()) +
 	theme(legend.text = element_text(size = 10)) +
#    geom_vline(xintercept=0.5,size=0.5,linetype="dashed") +
    geom_vline(xintercept=20.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=25.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=38.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=46.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=56.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=60.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=63.5,size=0.5,linetype="dashed") +
    coord_flip() +
    labs(x = "Experimentally Manipulated Variable", y = "Change Pr(Vote Choice)")
dev.off()


pdf("main/figures/choice_inparty_twoparty_separated.pdf", width=9, height=14, pointsize=12)
ggplot(ggplot_stacked,
    aes(x=labels, y=estimate, colour=PID)) +
    scale_color_manual(values = c("blue4","firebrick1")) +
    scale_x_discrete(limits=rev(labels)) +
    geom_hline(yintercept=0,size=0.5,color="gray80") +
	 facet_wrap(~PID,ncol=2) +
    geom_errorbar(width=.1, aes(ymin=minci, ymax=maxci),position=pd) +
    geom_point(shape=21, size=2,position=pd) +
    ylim(-0.26,0.26) +
    theme_bw() +
    theme(axis.text.y = element_text(hjust=0,color="gray40")) +
    theme(legend.position=c(-0.3,0.935),legend.title=element_text()) +
 	theme(legend.text = element_text(size = 10)) +
#    geom_vline(xintercept=0.5,size=0.5,linetype="dashed") +
    geom_vline(xintercept=20.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=25.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=38.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=46.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=56.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=60.5,size=0.5,linetype="dashed") +
	geom_vline(xintercept=63.5,size=0.5,linetype="dashed") +
    coord_flip() +
    labs(x = "Experimentally Manipulated Variable", y = "Change Pr(Vote Choice)")
dev.off()

# some interest in a zoom in result ....

write.csv(
	list('n.rep'=nrow(reps),
 		'n.rep.cluster'=length(unique(reps$respondent)),
 		'n.dem'=nrow(dems),
 		'n.dem.cluster'=length(unique(dems$respondent))
	),
	file='main/figures/choice_inparty_twoparty.csv'
)

# END
